R Markdown

library(modvarsel)
## 
## Attaching package: 'modvarsel'
## The following objects are masked from 'package:graphics':
## 
##     barplot, boxplot
library(mfe)
## 
## Attaching package: 'mfe'
## The following object is masked from 'package:modvarsel':
## 
##     cv_bandwidth
## The following object is masked from 'package:stats':
## 
##     rf
data <- as.matrix(read.csv('mfe/data/indicateurs.csv', header = TRUE, sep = ';', dec = ','))
# #script R pour creer les modeles
# choiceModels <- list()
# PI = list(EI = 1:3,
#           QC = 4:10,
#           QN = 11:17,
#           QS = 18:24)
# 
# for (i in 1:24){
#   if (i %in% PI$EI){
#     res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
#   }
# 
#   if (i %in% PI$EI){
#     res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
#   }
# 
#   if (i %in% PI$EI){
#     res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
#   }
# 
#   if (i %in% PI$EI){
#     res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
#   }
#   choiceModels[[i]] <- res
#   save(choiceModels, file = '../data/choiceModels2.RData')
# }
#select and save the model in a list
set.seed(123)
load('mfe/data/choiceModelsFinal.RData')
models <- list()

for (i in 1:24){
  #load modeles
  Ylabel <- colnames(data)[i]
  print(paste('Etude des modeles de prediction de:', Ylabel, sep = ' '))
  head(choiceModels[[i]]$mse)
  head(choiceModels[[i]]$mse_all)
  head(choiceModels[[i]]$r2)
  
  #selection of the best model on the R2
  perf_moy <- apply(X = choiceModels[[i]]$mse, MARGIN = 2, FUN = mean, na.rm = TRUE)
  sel_model <-  which(perf_moy == min(perf_moy))
  
  #plot the best method ----
  #RMSE
  boxplot(choiceModels[[i]], method = c("linreg", "sir", 'rf', "plsr", "ridge"), 
          type = "both", col = rgb(0,1,0,0.2), las = 2,
          main = "N = 50 replications")
  
  
  
  ######plot the variables selected for each model ----
  ##linreg
  #varaible selected
  barplot(choiceModels[[i]], method="linreg", type="varsel", las = 2, 
          main = "linreg", col = rgb(1,1,0,0.2))
  
  #total use of each variable in all the simulation
  barplot(choiceModels[[i]], method="linreg", type="sizemod", las = 2, 
          main = "linreg", col = rgb(0,1,1,0.2))
  
  ##sir
  #varaible selected
  barplot(choiceModels[[i]], method="sir", type="varsel", las = 2, 
          main = "sir", col = rgb(1,1,0,0.2))
  
  #total use of each variable in all the simulation
  barplot(choiceModels[[i]], method="sir", type="sizemod", las = 2, 
          main = "sir", col = rgb(0,1,1,0.2))
  
  ##rf
  #varaible selected
  barplot(choiceModels[[i]], method="rf", type="varsel", las = 2, 
          main = "rf", col = rgb(1,1,0,0.2))
  
  #total use of each variable in all the simulation
  barplot(choiceModels[[i]], method="rf", type="sizemod", las = 2, 
          main = "rf", col = rgb(0,1,1,0.2))
  
  ##plsr
  #varaible selected
  barplot(choiceModels[[i]], method="plsr", type="varsel", las = 2, 
          main = "plsr", col = rgb(1,1,0,0.2))
  
  #total use of each variable in all the simulation
  barplot(choiceModels[[i]], method="plsr", type="sizemod", las = 2, 
          main = "plsr", col = rgb(0,1,1,0.2))
  
  ##ridge
  #varaible selected
  barplot(choiceModels[[i]], method="ridge", type="varsel", las = 2, 
          main = "ridge", col = rgb(1,1,0,0.2))
  
  #total use of each variable in all the simulation
  barplot(choiceModels[[i]], method="ridge", type="sizemod", las = 2, 
          main = "ridge", col = rgb(0,1,1,0.2))
  
    #Selection of the variable with the importance
  Y <- data[, i]
  X <- data[,which(colnames(data) %in% rownames(choiceModels[[i]]$pvarsel))]
  methode <- c('linreg', 'sir', 'rf', 'plsr', 'ridge')[sel_model]
  print(paste('choix methode:', methode, sep = ' '))
  
  #etude de l'importance des variables
  imp <- varimportance(X = X, Y = Y, method = methode, nperm = 100)
  meanvi <- colMeans(imp$mat_imp) # mean importance
  knitr::kable(t(meanvi),digits=2)
  knitr::kable(imp$base_imp, digit=2) # baseline value of importance
  
  plot(imp,choice="boxplot", col="lightgreen")
  plot(imp,choice="meanplot", cutoff = TRUE)
  
  
  sel <- which(colnames(data) %in% select(imp, cutoff=TRUE)$var)
  Xnew <- data[, sel]

  
  #model final -----
  if (methode == 'linreg'){
    
    models[[i]] <- reg_lm(X = Xnew, Y = Y, Ylabel = Ylabel)

    print(summary(models[[i]]))
    
    #prediction of the input data
    y_pred <- predict(models[[i]], Xnew)
    
  }
  
  if (methode == 'sir'){
    
    models[[i]] <- sir(X = Xnew, Y = Y, Ylabel = Ylabel)
    
    #visualize ebouli plot
    plot(models[[i]], choice = 'eigenValue')
   
    #visualize the performance of the non-parametric estimation
    plot(models[[i]], choice = 'smoothing')
    
    # Prediction of the input data
    y_pred <- predict(models[[i]], Xnew)
    
  
  }
  
  if (methode == 'rf'){
    
    models[[i]] <- rf(X = Xnew, Y = Y, Ylabel = Ylabel)
    
    #prediction of the input data
    y_pred <- predict(models[[i]], Xnew)
    
 
  }
  
  if (methode == 'ridge'){
    
    models[[i]] <- ridge(X = Xnew, Y = Y, Ylabel = Ylabel)

    print(summary(models[[i]]))
    
    #prediction of the input data
    y_pred <- predict(models[[i]], Xnew)
    
  }
}
## [1] "Etude des modeles de prediction de: PV"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     99     -none- numeric  
## cvm        99     -none- numeric  
## cvsd       99     -none- numeric  
## cvup       99     -none- numeric  
## cvlo       99     -none- numeric  
## nzero      99     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: GMQ"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      97    -none- numeric  
## cvm         97    -none- numeric  
## cvsd        97    -none- numeric  
## cvup        97    -none- numeric  
## cvlo        97    -none- numeric  
## nzero       97    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      6    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    180    -none- numeric  
## [1] "Etude des modeles de prediction de: Eff_alim"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     98     -none- numeric  
## cvm        98     -none- numeric  
## cvsd       98     -none- numeric  
## cvup       98     -none- numeric  
## cvlo       98     -none- numeric  
## nzero      98     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     3     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    90     -none- numeric  
## [1] "Etude des modeles de prediction de: col_C"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      98    -none- numeric  
## cvm         98    -none- numeric  
## cvsd        98    -none- numeric  
## cvup        98    -none- numeric  
## cvlo        98    -none- numeric  
## nzero       98    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      7    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    210    -none- numeric  
## [1] "Etude des modeles de prediction de: pH"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     99     -none- numeric  
## cvm        99     -none- numeric  
## cvsd       99     -none- numeric  
## cvup       99     -none- numeric  
## cvlo       99     -none- numeric  
## nzero      99     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: Qte_gras"

## [1] "choix methode: rf"

## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.

## [1] "Etude des modeles de prediction de: Prop_gras"

## [1] "choix methode: rf"

## [1] "Etude des modeles de prediction de: poids_carc"

## [1] "choix methode: linreg"

## 
## Call:
## lm(formula = Y ~ ., data = as.data.frame(X))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.8952  -4.9275  -0.5383   4.6608  13.4423 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.833e-09  1.316e+00   0.000    1e+00    
## PV           6.095e-01  2.374e-02  25.673   <2e-16 ***
## GMQ         -1.996e+01  4.643e+00  -4.299    2e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.207 on 27 degrees of freedom
## Multiple R-squared:  0.9621, Adjusted R-squared:  0.9592 
## F-statistic: 342.3 on 2 and 27 DF,  p-value: < 2.2e-16
## 
## [1] "Etude des modeles de prediction de: confo"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      97    -none- numeric  
## cvm         97    -none- numeric  
## cvsd        97    -none- numeric  
## cvup        97    -none- numeric  
## cvlo        97    -none- numeric  
## nzero       97    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      7    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    210    -none- numeric  
## [1] "Etude des modeles de prediction de: col_lumi"

## [1] "choix methode: rf"

## [1] "Etude des modeles de prediction de: ratio_C160_C180"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      93    -none- numeric  
## cvm         93    -none- numeric  
## cvsd        93    -none- numeric  
## cvup        93    -none- numeric  
## cvlo        93    -none- numeric  
## nzero       93    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      6    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    180    -none- numeric  
## [1] "Etude des modeles de prediction de: lipide_tnr"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      98    -none- numeric  
## cvm         98    -none- numeric  
## cvsd        98    -none- numeric  
## cvup        98    -none- numeric  
## cvlo        98    -none- numeric  
## nzero       98    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels     16    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    480    -none- numeric  
## [1] "Etude des modeles de prediction de: AGMI_vs_AGPI"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      98    -none- numeric  
## cvm         98    -none- numeric  
## cvsd        98    -none- numeric  
## cvup        98    -none- numeric  
## cvlo        98    -none- numeric  
## nzero       98    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels     16    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    480    -none- numeric  
## [1] "Etude des modeles de prediction de: AGL"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     96     -none- numeric  
## cvm        96     -none- numeric  
## cvsd       96     -none- numeric  
## cvup       96     -none- numeric  
## cvlo       96     -none- numeric  
## nzero      96     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: AGtr"

## [1] "choix methode: sir"

## [1] "Etude des modeles de prediction de: n6_sur_n3"

## [1] "choix methode: rf"

## [1] "Etude des modeles de prediction de: CLA"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      98    -none- numeric  
## cvm         98    -none- numeric  
## cvsd        98    -none- numeric  
## cvup        98    -none- numeric  
## cvlo        98    -none- numeric  
## nzero       98    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      4    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    120    -none- numeric  
## [1] "Etude des modeles de prediction de: tendrete"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     91     -none- numeric  
## cvm        91     -none- numeric  
## cvsd       91     -none- numeric  
## cvup       91     -none- numeric  
## cvlo       91     -none- numeric  
## nzero      91     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: jutosite"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     97     -none- numeric  
## cvm        97     -none- numeric  
## cvsd       97     -none- numeric  
## cvup       97     -none- numeric  
## cvlo       97     -none- numeric  
## nzero      97     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     3     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    90     -none- numeric  
## [1] "Etude des modeles de prediction de: amere"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     89     -none- numeric  
## cvm        89     -none- numeric  
## cvsd       89     -none- numeric  
## cvup       89     -none- numeric  
## cvlo       89     -none- numeric  
## nzero      89     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: rance_poisson"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     73     -none- numeric  
## cvm        73     -none- numeric  
## cvsd       73     -none- numeric  
## cvup       73     -none- numeric  
## cvlo       73     -none- numeric  
## nzero      73     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: gras_vs_metal"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda      86    -none- numeric  
## cvm         86    -none- numeric  
## cvsd        86    -none- numeric  
## cvup        86    -none- numeric  
## cvlo        86    -none- numeric  
## nzero       86    -none- numeric  
## name         1    -none- character
## glmnet.fit  12    elnet  list     
## lambda.min   1    -none- numeric  
## lambda.1se   1    -none- numeric  
## Xlabels      5    -none- character
## Ylabel       1    -none- character
## y_train     30    -none- numeric  
## x_train    150    -none- numeric  
## [1] "Etude des modeles de prediction de: sang_acide"

## [1] "choix methode: ridge"

##            Length Class  Mode     
## lambda     94     -none- numeric  
## cvm        94     -none- numeric  
## cvsd       94     -none- numeric  
## cvup       94     -none- numeric  
## cvlo       94     -none- numeric  
## nzero      94     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric  
## Xlabels     2     -none- character
## Ylabel      1     -none- character
## y_train    30     -none- numeric  
## x_train    60     -none- numeric  
## [1] "Etude des modeles de prediction de: intense_flav_sucre"

## [1] "choix methode: rf"

# save(models, file = '../data/models2.RData')


#add under models
for (i in 1:length(models)){
  if (class(models[[i]]) == 'reg_lm')  models[[i]]$all_models <- underModel.reg_lm(models[[i]], B = 100)
  
  if (class(models[[i]]) == 'rf')  models[[i]]$all_models <- underModel.rf(models[[i]], B = 100)
  
  if (class(models[[i]]) == 'sir')  models[[i]]$all_models <- underModel.sir(models[[i]], B = 100)
}


save(models, file = 'mfe/data/modelsFifi.RData')